Example data set

data <- makeExampleData()
class(data)
## [1] "list"
class(data$view_1)
## [1] "matrix"
data$view_1[1:5, 1:5]
##             sample_1    sample_2   sample_3   sample_4    sample_5
## feature1  1.04292020  0.42254613  0.3651112 -0.7398621 -0.85776528
## feature2 -0.48673662  0.08065751  0.4987665  0.5177779  0.85860367
## feature3  0.79444546 -0.15074124 -0.4359495 -1.3791868 -0.77422531
## feature4 -0.04001955 -0.15697565  0.2672643  0.5078698  0.16426913
## feature5  0.35637070  0.17460373 -0.2218602 -0.3179047 -0.03301396
data$view_2[1:5, 1:5]
##             sample_1    sample_2   sample_3    sample_4     sample_5
## feature1 -0.30478573  0.18945287  1.2142699 -0.02885239  1.698523220
## feature2 -1.29607501 -0.35929397  0.1772687  0.29080186 -0.008588118
## feature3  1.17441443  0.49384601  0.1681526  1.17310775 -1.123760451
## feature4  1.80477274 -0.06750705 -1.2368071  0.38068882 -1.146340648
## feature5 -0.00791598  0.16114470 -0.3845192  0.70679625 -0.375288005
data$view_3[1:5, 1:5]
##            sample_1    sample_2   sample_3    sample_4    sample_5
## feature1  0.2612291 -0.08213791  0.2666322  0.04455182 -0.12219337
## feature2 -0.4166879  0.29339382  0.1228433 -0.51984874  0.43640303
## feature3 -0.6145921 -0.69143575 -0.2048873 -0.32561906 -0.04718821
## feature4 -0.3467249  0.43780264  2.0658769 -0.64374345  1.79271128
## feature5  0.3152318 -0.07858969  0.4348445 -0.54362060 -0.17295689
summary(data$view_1[,1:10])
##     sample_1           sample_2            sample_3           sample_4       
##  Min.   :-2.45643   Min.   :-0.914858   Min.   :-0.97372   Min.   :-1.89039  
##  1st Qu.:-0.29010   1st Qu.:-0.257194   1st Qu.:-0.20457   1st Qu.:-0.21977  
##  Median :-0.01496   Median : 0.003731   Median : 0.03626   Median : 0.03563  
##  Mean   :-0.01638   Mean   :-0.007379   Mean   : 0.05734   Mean   : 0.07871  
##  3rd Qu.: 0.32315   3rd Qu.: 0.222240   3rd Qu.: 0.28475   3rd Qu.: 0.36983  
##  Max.   : 1.26031   Max.   : 0.817327   Max.   : 0.76781   Max.   : 2.88071  
##     sample_5           sample_6          sample_7           sample_8       
##  Min.   :-1.86196   Min.   :-2.7456   Min.   :-0.89870   Min.   :-3.59294  
##  1st Qu.:-0.37314   1st Qu.:-0.3381   1st Qu.:-0.31557   1st Qu.:-0.49843  
##  Median : 0.02583   Median : 0.1538   Median : 0.02052   Median :-0.02190  
##  Mean   : 0.07326   Mean   : 0.1024   Mean   :-0.01743   Mean   :-0.07599  
##  3rd Qu.: 0.44331   3rd Qu.: 0.5084   3rd Qu.: 0.19943   3rd Qu.: 0.30064  
##  Max.   : 2.83617   Max.   : 3.2860   Max.   : 1.02104   Max.   : 2.64937  
##     sample_9          sample_10       
##  Min.   :-0.95906   Min.   :-4.91373  
##  1st Qu.:-0.19187   1st Qu.:-0.50626  
##  Median :-0.02509   Median : 0.03421  
##  Mean   :-0.01973   Mean   :-0.10279  
##  3rd Qu.: 0.17396   3rd Qu.: 0.46511  
##  Max.   : 0.89147   Max.   : 3.61538
set.seed(1234)
MOFAobject <- createMOFAobject(data)
## Creating MOFA object from list of matrices,
##  please make sure that samples are columns and features are rows...
plotDataOverview(MOFAobject)

Prepare MOFA: Set the training and model options

TrainOptions <- getDefaultTrainOptions()
ModelOptions <- getDefaultModelOptions(MOFAobject)
DataOptions <- getDefaultDataOptions()

TrainOptions$DropFactorThreshold <- 0.01

Run MOFA

n_inits <- 3
MOFAlist <- lapply(seq_len(n_inits), function(it) {
  
  TrainOptions$seed <- 2018 + it
  
  MOFAobject <- prepareMOFA(MOFAobject,
                            DataOptions = DataOptions,
                            ModelOptions = ModelOptions,
                            TrainOptions = TrainOptions
)
  
  runMOFA(MOFAobject)
})
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."
## Checking data options...
## Checking training options...
## Checking model options...
## [1] "No output file provided, using a temporary file..."

Compare different random inits and select the best model

compareModels(MOFAlist)

With compareFactors we can get an overview of how robust the factors are between different model instances.

compareFactors(MOFAlist)

For down-stream analyses we recommned to choose the model with the best ELBO value as is done by selectModel.

MOFAobject <- selectModel(MOFAlist, plotit = FALSE)
MOFAobject
## Trained MOFA model with the following characteristics:
##   Number of views: 3 
##  View names: view_1 view_2 view_3 
##   Number of features per view: 100 100 100 
##   Number of samples: 50 
##   Number of factors: 5

Downstream analyses

On the trained MOFAobject we can now start looking into the inferred factors, its weights etc. Here the data was generated using five factors, whose activity patterns we can recover using plotVarianceExplained.

plotVarianceExplained(MOFAobject)

Mafe’s data (CyTOF, RNA Seq, WES)

library(dplyr)
library(tidyr)
prcnt <- 0.1 # cutoff: genes must be mutated in prcnt*100 % of the samples
mut_dt_sum <- as_tibble(mut_dt) %>% 
  mutate(sum = rowSums(., na.rm = TRUE), 
         genes = rownames(mut_dt)) %>%
  arrange(., desc(sum)) %>%
  select(genes, sum) %>%
  filter(., sum > 53*prcnt)

mut_dt <- mut_dt[mut_dt_sum$genes,]

Preprocessing data

mofa_data <- list('CyTOF_exp'=CyTOF_exp, 
                  'CyTOF_prcnt'=CyTOF_prcnt, 
                  'RNA_topgenes'=RNA_topclust_E, 
                  'RNA_xcell'=RNA_xcell,
                  'mutation'=mut_dt)
set.seed(1234)
MOFAobject <- createMOFAobject(mofa_data)
## Creating MOFA object from list of matrices,
##  please make sure that samples are columns and features are rows...
plotDataOverview(MOFAobject)

Prepare MOFA: Set the training and model options

The most important options the user needs to define are:

scaleViews: logical indicating whether to scale views to have unit variance. As long as the scale of the different data sets is not too high, this is not required. Default is FALSE.

removeIncompleteSamples: logical indicating whether to remove samples that are not profiled in all omics. The model can cope with missing assays, so this option is not required. Default is FALSE.

Data options

DataOptions <- getDefaultDataOptions()
DataOptions 
## $scaleViews
## [1] FALSE
## 
## $removeIncompleteSamples
## [1] FALSE

Model options

Next, we define model options. The most important are:

numFactors: number of factors (default is 0.5 times the number of samples). By default, the model will only remove a factor if it explains exactly zero variance in the data. You can increase this threshold on minimum variance explained by setting TrainOptions$dropFactorThreshold to a value higher than zero.

likelihoods: likelihood for each view. Usually we recommend gaussian for continuous data, bernoulli for binary data and poisson for count data. By default, the model tries to guess it from the data.

sparsity: do you want to use sparsity? This makes the interpretation easier so it is recommended (Default is TRUE).

ModelOptions <- getDefaultModelOptions(MOFAobject)
ModelOptions
## $likelihood
##    CyTOF_exp  CyTOF_prcnt RNA_topgenes    RNA_xcell     mutation 
##   "gaussian"   "gaussian"   "gaussian"   "gaussian"  "bernoulli" 
## 
## $numFactors
## [1] 45
## 
## $sparsity
## [1] TRUE

Training options

Next, we define training options. The most important are:

maxiter: maximum number of iterations. Ideally set it large enough and use the convergence criterion TrainOptions$tolerance.

tolerance: convergence threshold based on change in the evidence lower bound. For an exploratory run you can use a value between 1.0 and 0.1, but for a “final” model we recommend a value of 0.01.

DropFactorThreshold: hyperparameter to automatically learn the number of factors based on a minimum variance explained criteria. Factors explaining less than DropFactorThreshold fraction of variation in all views will be removed. For example, a value of 0.01 means that factors that explain less than 1% of variance in all views will be discarded. By default this it zero, meaning that all factors are kept unless they explain no variance at all.

TrainOptions <- getDefaultTrainOptions()

# Automatically drop factors that explain less than 2% of variance in all omics
TrainOptions$DropFactorThreshold <- 0.02

TrainOptions$seed <- 2017

TrainOptions
## $maxiter
## [1] 5000
## 
## $tolerance
## [1] 0.1
## 
## $DropFactorThreshold
## [1] 0.02
## 
## $verbose
## [1] FALSE
## 
## $seed
## [1] 2017

Prepare MOFA

prepareMOFA internally performs a set of sanity checks and fills the DataOptions, TrainOptions and ModelOptions slots of the MOFAobject

MOFAobject <- prepareMOFA(
  MOFAobject, 
  DataOptions = DataOptions,
  ModelOptions = ModelOptions,
  TrainOptions = TrainOptions
)
## Checking data options...
## Checking training options...
## Checking model options...

Run MOFA

MOFAobject <- runMOFA(MOFAobject)
## [1] "No output file provided, using a temporary file..."

Analyse trained MOFA model

Disentangling the heterogeneity: calculation of variance explained by each factor in each view

# Calculate the variance explained (R2) per factor in each view 
r2 <- calculateVarianceExplained(MOFAobject)
r2$R2Total
##    CyTOF_exp  CyTOF_prcnt RNA_topgenes    RNA_xcell     mutation 
##    0.5552340    0.3183194    0.8501937    0.7161544    0.3137230
# Variance explained by each factor in each view
head(r2$R2PerFactor)
##        CyTOF_exp  CyTOF_prcnt RNA_topgenes    RNA_xcell     mutation
## LF1 3.472188e-01 0.1421290604 1.369124e-05 1.196426e-05 2.508180e-05
## LF2 2.666047e-05 0.0004605097 7.057021e-06 4.080459e-01 4.078439e-05
## LF3 2.050518e-05 0.0028526423 7.581071e-03 2.046978e-02 3.131551e-01
## LF4 3.276518e-05 0.0024545328 2.188586e-01 1.021090e-01 3.508591e-05
## LF5 5.244941e-05 0.0004696920 3.118061e-01 5.899720e-03 3.953266e-05
## LF6 6.214907e-05 0.0012019308 1.925714e-01 4.288120e-02 3.009597e-05
# Plot it
plotVarianceExplained(MOFAobject)

Characterization of individual factors

To get an overview of the weights across all factors in a given view you can use the plotWeightsHeatmap function.

plotWeightsHeatmap(
  MOFAobject, 
  view = "CyTOF_prcnt", 
  factors = 1:5,
  show_colnames = T, main = 'CyTOF_prcnt'
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_prcnt", 
  factor=3
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_prcnt", 
  factor=5
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_prcnt", 
  factor=1
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_prcnt", 
  factor=2
)

plotWeightsHeatmap(
  MOFAobject, 
  view = "CyTOF_exp", 
  factors = 1:5,
  show_colnames = FALSE, main = 'CyTOF_exp'
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_exp", 
  factor=3
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_exp", 
  factor=5
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_exp", 
  factor=1
)

plotTopWeights(
  MOFAobject, 
  view="CyTOF_exp", 
  factor=2
)

plotWeightsHeatmap(
  MOFAobject, 
  view = "RNA_topgenes", 
  factors = 1:5,
  show_colnames = FALSE, main = 'RNA_topgenes'
)

plotTopWeights(
  MOFAobject, 
  view="RNA_topgenes", 
  factor=1
)

plotWeightsHeatmap(
  MOFAobject, 
  view = "RNA_xcell", 
  factors = 1:5,
  show_colnames = T, main = 'RNA_xcell'
)

plotTopWeights(
  MOFAobject, 
  view="RNA_xcell", 
  factor=2
)

plotTopWeights(
  MOFAobject, 
  view="RNA_xcell", 
  factor=1
)

plotTopWeights(
  MOFAobject, 
  view="RNA_xcell", 
  factor=4
)

plotTopWeights(
  MOFAobject, 
  view="RNA_xcell", 
  factor=5
)

plotWeightsHeatmap(
  MOFAobject, 
  view = "mutation", 
  factors = 1:5,
  show_colnames = F, main = 'Mutation Data'
)

plotTopWeights(
  MOFAobject, 
  view="mutation", 
  factor=1
)

Feature set enrichment analysis in the active views

# Load reactome annotations
data("reactomeGS") # binary matrix with feature sets in rows and features in columns

# perform enrichment analysis
gsea <- runEnrichmentAnalysis(
  MOFAobject,
  view = "RNA_topgenes",
  feature.sets = reactomeGS,
  alpha = 0.01
)
## Doing Feature Set Enrichment Analysis with the following options...
## View: RNA_topgenes
## Factors: LF1 LF2 LF3 LF4 LF5 LF6 LF7 LF8 LF9 LF10 LF11 LF12 LF13
## Number of feature sets: 55
## Local statistic: loading
## Transformation: abs.value
## Global statistic: mean.diff
## Statistical test: parametric
plotEnrichmentBars(gsea, alpha=0.01)

interestingFactors <- 1:2

fseaplots <- lapply(interestingFactors, function(factor) {
  plotEnrichment(
    MOFAobject,
    gsea,
    factor = factor,
    alpha = 0.01,
    max.pathways = 10 # The top number of pathways to display
  )
})
## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold. 
## 
##             For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().

## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold. 
## 
##             For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().
cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
                   ncol = 1, labels = paste("Factor", interestingFactors))

interestingFactors <- 3:4

fseaplots <- lapply(interestingFactors, function(factor) {
  plotEnrichment(
    MOFAobject,
    gsea,
    factor = factor,
    alpha = 0.01,
    max.pathways = 10 # The top number of pathways to display
  )
})
## Warning in plotEnrichment(MOFAobject, gsea, factor = factor, alpha = 0.01, : No siginificant pathways at the specified alpha threshold. 
## 
##             For an overview use plotEnrichmentHeatmap() or plotEnrichmentBars().
cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
                   ncol = 1, labels = paste("Factor", interestingFactors))

interestingFactors <- 5:6

fseaplots <- lapply(interestingFactors, function(factor) {
  plotEnrichment(
    MOFAobject,
    gsea,
    factor = factor,
    alpha = 0.01,
    max.pathways = 10 # The top number of pathways to display
  )
})

cowplot::plot_grid(fseaplots[[1]], fseaplots[[2]],
                   ncol = 1, labels = paste("Factor", interestingFactors))

plotFactorScatter(
  MOFAobject,
  factors = 1:2,
  color_by = "ImmuneScore"      # color by the IGHV values that are part of the training data
  #shape_by = "trisomy12"  # shape by the trisomy12 values that are part of the training data
)

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "ImmuneScore"
)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "Th_cells"
)
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "Tc_cells"
)
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "Epi_174Yb_HLA-DR"
)
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "KRAS"
)
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

plotFactorScatters(
  MOFAobject,
  factors = c(1:5),
  color_by = "EGFR"
)
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).